diff --git a/assets/src/bundles/admin/deposit.js b/assets/src/bundles/admin/deposit.js
--- a/assets/src/bundles/admin/deposit.js
+++ b/assets/src/bundles/admin/deposit.js
@@ -1,17 +1,23 @@
/**
- * Copyright (C) 2018-2021 The Software Heritage developers
+ * Copyright (C) 2018-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
function genSwhLink(data, type) {
- if (type === 'display') {
- if (data && data.startsWith('swh')) {
- const browseUrl = Urls.browse_swhid(data);
- const formattedSWHID = data.replace(/;/g, ';
');
- return `${formattedSWHID}`;
- }
+ if (type === 'display' && data && data.startsWith('swh')) {
+ const browseUrl = Urls.browse_swhid(data);
+ const formattedSWHID = data.replace(/;/g, ';
');
+ return `${formattedSWHID}`;
+ }
+ return data;
+}
+
+function genLink(data, type) {
+ if (type === 'display' && data) {
+ const sData = encodeURI(data);
+ return `${sData}`;
}
return data;
}
@@ -55,23 +61,14 @@
name: 'id'
},
{
- data: 'swhid_context',
- name: 'swhid_context',
+ data: 'type',
+ name: 'type'
+ },
+ {
+ data: 'uri',
+ name: 'uri',
render: (data, type, row) => {
- if (data && type === 'display') {
- const originPattern = ';origin=';
- const originPatternIdx = data.indexOf(originPattern);
- if (originPatternIdx !== -1) {
- let originUrl = data.slice(originPatternIdx + originPattern.length);
- const nextSepPattern = ';';
- const nextSepPatternIdx = originUrl.indexOf(nextSepPattern);
- if (nextSepPatternIdx !== -1) { /* Remove extra context */
- originUrl = originUrl.slice(0, nextSepPatternIdx);
- }
- return `${originUrl}`;
- }
- }
- return data;
+ return genLink(data, type);
}
},
{
diff --git a/cypress/integration/deposit-admin.spec.js b/cypress/integration/deposit-admin.spec.js
--- a/cypress/integration/deposit-admin.spec.js
+++ b/cypress/integration/deposit-admin.spec.js
@@ -1,5 +1,5 @@
/**
- * Copyright (C) 2020-2021 The Software Heritage developers
+ * Copyright (C) 2020-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
@@ -14,30 +14,36 @@
responseDeposits = [
{
'id': 614,
+ 'type': 'code',
'external_id': 'ch-de-1',
'reception_date': '2020-05-18T13:48:27Z',
'status': 'done',
'status_detail': null,
'swhid': 'swh:1:dir:ef04a768',
- 'swhid_context': 'swh:1:dir:ef04a768;origin=https://w.s.o/c-d-1;visit=swh:1:snp:b234be1e;anchor=swh:1:rev:d24a75c9;path=/'
+ 'swhid_context': 'swh:1:dir:ef04a768;origin=https://w.s.o/c-d-1;visit=swh:1:snp:b234be1e;anchor=swh:1:rev:d24a75c9;path=/',
+ 'uri': 'https://w.s.o/c-d-1'
},
{
'id': 613,
+ 'type': 'code',
'external_id': 'ch-de-2',
'reception_date': '2020-05-18T11:20:16Z',
'status': 'done',
'status_detail': null,
'swhid': 'swh:1:dir:181417fb',
- 'swhid_context': 'swh:1:dir:181417fb;origin=https://w.s.o/c-d-2;visit=swh:1:snp:8c32a2ef;anchor=swh:1:rev:3d1eba04;path=/'
+ 'swhid_context': 'swh:1:dir:181417fb;origin=https://w.s.o/c-d-2;visit=swh:1:snp:8c32a2ef;anchor=swh:1:rev:3d1eba04;path=/',
+ 'uri': 'https://w.s.o/c-d-2'
},
{
'id': 612,
+ 'type': 'code',
'external_id': 'ch-de-3',
'reception_date': '2020-05-18T11:20:16Z',
'status': 'rejected',
'status_detail': 'incomplete deposit!',
'swhid': null,
- 'swhid_context': null
+ 'swhid_context': null,
+ 'uri': null
}
];
// those are computed from the
@@ -87,6 +93,8 @@
assert.isNotNull(deposit);
assert.isNotNull(responseDeposit);
expect(deposit.id).to.be.equal(responseDeposit['id']);
+ expect(deposit.uri).to.be.equal(responseDeposit['uri']);
+ expect(deposit.type).to.be.equal(responseDeposit['type']);
expect(deposit.external_id).to.be.equal(responseDeposit['external_id']);
expect(deposit.status).to.be.equal(responseDeposit['status']);
expect(deposit.status_detail).to.be.equal(responseDeposit['status_detail']);
diff --git a/swh/web/admin/deposit.py b/swh/web/admin/deposit.py
--- a/swh/web/admin/deposit.py
+++ b/swh/web/admin/deposit.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2021 The Software Heritage developers
+# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -13,7 +13,11 @@
from swh.web.admin.adminurls import admin_route
from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION
-from swh.web.common.utils import get_deposits_list
+from swh.web.common.utils import (
+ get_deposits_list,
+ parse_swh_deposit_origin,
+ parse_swh_metadata_provenance,
+)
def _can_list_deposits(user):
@@ -70,9 +74,11 @@
data = paginator.page(page).object_list
table_data["recordsTotal"] = deposits_count
table_data["recordsFiltered"] = len(deposits)
- table_data["data"] = [
- {
+ data_list = []
+ for d in data:
+ data_dict = {
"id": d["id"],
+ "type": d["type"],
"external_id": d["external_id"],
"reception_date": d["reception_date"],
"status": d["status"],
@@ -80,13 +86,39 @@
"swhid": d["swhid"],
"swhid_context": d["swhid_context"],
}
- for d in data
- ]
+ provenance = None
+ raw_metadata = d["raw_metadata"]
+ # Try to determine provenance out of the raw metadata
+ if raw_metadata and d["type"] == "meta": # metadata provenance
+ provenance = parse_swh_metadata_provenance(d["raw_metadata"])
+ elif raw_metadata and d["type"] == "code":
+ provenance = parse_swh_deposit_origin(raw_metadata)
+
+ if not provenance and d["origin_url"]:
+ provenance = d["origin_url"]
+
+ # Finally, if still not found, we determine uri using the swhid
+ if not provenance and d["swhid_context"]:
+ # Trying to compute the origin as we did before in the js
+ from swh.model.swhids import QualifiedSWHID
+
+ swhid = QualifiedSWHID.from_string(d["swhid_context"])
+ provenance = swhid.origin
+
+ data_dict["uri"] = provenance # could be None
+
+ # This could be large. As this is not displayed yet, drop it to avoid
+ # cluttering the data dict
+ data_dict.pop("raw_metadata", None)
+
+ data_list.append(data_dict)
+
+ table_data["data"] = data_list
except Exception as exc:
sentry_sdk.capture_exception(exc)
- table_data["error"] = (
- "An error occurred while retrieving " "the list of deposits !"
- )
+ table_data[
+ "error"
+ ] = "An error occurred while retrieving the list of deposits !"
return JsonResponse(table_data)
diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py
--- a/swh/web/common/utils.py
+++ b/swh/web/common/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -8,6 +8,7 @@
import re
from typing import Any, Dict, List, Optional
import urllib.parse
+from xml.etree import ElementTree
from bs4 import BeautifulSoup
from docutils.core import publish_parts
@@ -447,3 +448,69 @@
request_path = resolve(request.path_info)
args = {**request_path.kwargs, **request.GET.dict()}
return redirect(reverse(new_route, query_params=args), permanent=permanent,)
+
+
+NAMESPACES = {
+ "swh": "https://www.softwareheritage.org/schema/2018/deposit",
+ "schema": "http://schema.org/",
+}
+
+
+def parse_swh_metadata_provenance(raw_metadata: str) -> Optional[str]:
+ """Parse swh metadata-provenance out of the raw metadata deposit. If found, returns the
+ value, None otherwise.
+
+ .. code-block:: xml
+
+
+
+ https://example.org/metadata/url
+
+
+
+ Args:
+ raw_metadata: raw metadata out of deposits received
+
+ Returns:
+ Either the metadata provenance url if any or None otherwise
+
+ """
+ metadata = ElementTree.fromstring(raw_metadata)
+ url = metadata.findtext(
+ "swh:deposit/swh:metadata-provenance/schema:url", namespaces=NAMESPACES,
+ )
+ return url or None
+
+
+def parse_swh_deposit_origin(raw_metadata: str) -> Optional[str]:
+ """Parses and from metadata document,
+ if any. They are mutually exclusive and tested as such in the deposit.
+
+ .. code-block:: xml
+
+
+
+
+
+
+
+ .. code-block:: xml
+
+
+
+
+
+
+
+ Returns:
+ The one not null if any, None otherwise
+
+ """
+ metadata = ElementTree.fromstring(raw_metadata)
+ for origin_tag in ["create_origin", "add_to_origin"]:
+ elt = metadata.find(
+ f"swh:deposit/swh:{origin_tag}/swh:origin[@url]", namespaces=NAMESPACES
+ )
+ if elt is not None:
+ return elt.attrib["url"]
+ return None
diff --git a/swh/web/templates/admin/deposit.html b/swh/web/templates/admin/deposit.html
--- a/swh/web/templates/admin/deposit.html
+++ b/swh/web/templates/admin/deposit.html
@@ -30,19 +30,21 @@
id |
- origin |
+ type |
+ uri |
reception date |
status |
status detail |
diff --git a/swh/web/tests/common/test_utils.py b/swh/web/tests/common/test_utils.py
--- a/swh/web/tests/common/test_utils.py
+++ b/swh/web/tests/common/test_utils.py
@@ -1,10 +1,10 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
-
from base64 import b64encode
import datetime
+from os.path import join
from urllib.parse import quote
import pytest
@@ -314,3 +314,44 @@
def test_is_swh_web_production(request_factory):
request = request_factory.get("/", SERVER_NAME=SWH_WEB_SERVER_NAME)
assert utils.is_swh_web_production(request)
+
+
+@pytest.mark.parametrize(
+ "raw_metadata_file,expected_url",
+ [
+ ("raw-metadata-provenance.xml", "https://example.org/metadata/provenance"),
+ ("raw-metadata-no-swh.xml", None),
+ ],
+)
+def test_parse_swh_provenance(datadir, raw_metadata_file, expected_url):
+ metadata_path = join(datadir, "deposit", raw_metadata_file)
+ with open(metadata_path, "r") as f:
+ raw_metadata = f.read()
+
+ actual_url = utils.parse_swh_metadata_provenance(raw_metadata)
+
+ assert actual_url == expected_url
+
+
+@pytest.mark.parametrize(
+ "raw_metadata_file,expected_url",
+ [
+ (
+ "raw-metadata-create-origin.xml",
+ "https://example.org/metadata/create-origin",
+ ),
+ (
+ "raw-metadata-add-to-origin.xml",
+ "https://example.org/metadata/add-to-origin",
+ ),
+ ("raw-metadata-no-swh.xml", None),
+ ],
+)
+def test_parse_swh_origins(datadir, raw_metadata_file, expected_url):
+ metadata_path = join(datadir, "deposit", raw_metadata_file)
+ with open(metadata_path, "r") as f:
+ raw_metadata = f.read()
+
+ actual_url = utils.parse_swh_deposit_origin(raw_metadata)
+
+ assert actual_url == expected_url
diff --git a/swh/web/tests/resources/deposit/raw-metadata-add-to-origin.xml b/swh/web/tests/resources/deposit/raw-metadata-add-to-origin.xml
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/resources/deposit/raw-metadata-add-to-origin.xml
@@ -0,0 +1,13 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ dudess
+
+
+
+
+
+
diff --git a/swh/web/tests/resources/deposit/raw-metadata-create-origin.xml b/swh/web/tests/resources/deposit/raw-metadata-create-origin.xml
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/resources/deposit/raw-metadata-create-origin.xml
@@ -0,0 +1,13 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a
+ dudess
+
+
+
+
+
+
diff --git a/swh/web/tests/resources/deposit/raw-metadata-no-swh.xml b/swh/web/tests/resources/deposit/raw-metadata-no-swh.xml
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/resources/deposit/raw-metadata-no-swh.xml
@@ -0,0 +1,7 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a
+ dudess
+
diff --git a/swh/web/tests/resources/deposit/raw-metadata-provenance.xml b/swh/web/tests/resources/deposit/raw-metadata-provenance.xml
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/resources/deposit/raw-metadata-provenance.xml
@@ -0,0 +1,14 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a
+ dudess
+
+
+ https://example.org/metadata/provenance
+
+
+